Tensorboard
In [46]:
import time
import os
import pandas as pd
project_name = 'Dog_Breed_Identification'
step_name = 'Train-Predict-XGBoost'
time_str = time.strftime("%Y%m%d_%H%M%S", time.localtime())
run_name = project_name + '_' + step_name + '_' + time_str
print('run_name: ' + run_name)
cwd = os.getcwd()
log_path = os.path.join(cwd, 'log')
model_path = os.path.join(cwd, 'model')
output_path = os.path.join(cwd, 'output')
print('log_path: \t' + log_path)
print('model_path: \t' + model_path)
print('output_path: \t' + output_path)
In [27]:
df = pd.read_csv(os.path.join(cwd, 'input', 'labels.csv'))
print('lables amount: %d' %len(df))
df.head()
Out[27]:
In [28]:
import h5py
import numpy as np
from sklearn.utils import shuffle
np.random.seed(2017)
x_train = []
y_train = {}
x_val = []
y_val = {}
x_test = []
cwd = os.getcwd()
feature_cgg16 = os.path.join(cwd, 'model', 'feature_VGG16_{}.h5'.format(20171026))
feature_cgg19 = os.path.join(cwd, 'model', 'feature_VGG19_{}.h5'.format(20171026))
feature_resnet50 = os.path.join(cwd, 'model', 'feature_ResNet50_{}.h5'.format(20171026))
feature_xception = os.path.join(cwd, 'model', 'feature_Xception_{}.h5'.format(20171026))
feature_inception = os.path.join(cwd, 'model', 'feature_InceptionV3_{}.h5'.format(20171026))
# feature_inceptionResNetV2 = os.path.join(cwd, 'model', 'feature_InceptionResNetV2_{}.h5'.format(20171028))
for filename in [feature_cgg16, feature_cgg19, feature_resnet50, feature_xception, feature_inception]:
with h5py.File(filename, 'r') as h:
x_train.append(np.array(h['train']))
y_train = np.array(h['train_labels'])
x_test.append(np.array(h['test']))
# print(x_train[0].shape)
x_train = np.concatenate(x_train, axis=-1)
# y_train = np.concatenate(y_train, axis=0)
# x_val = np.concatenate(x_val, axis=-1)
# y_val = np.concatenate(y_val, axis=0)
x_test = np.concatenate(x_test, axis=-1)
print(x_train.shape)
print(x_train.shape[1:])
print(len(y_train))
# print(x_val.shape)
# print(len(y_val))
print(x_test.shape)
In [29]:
from sklearn.utils import shuffle
(x_train, y_train) = shuffle(x_train, y_train)
In [30]:
from sklearn.model_selection import train_test_split
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.05, random_state=2017)
print(x_train.shape)
print(y_train.shape)
print(x_val.shape)
print(y_val.shape)
In [31]:
from keras.utils.np_utils import to_categorical
# y_train = to_categorical(y_train)
# y_val = to_categorical(y_val)
print(y_train.shape)
print(y_val.shape)
In [32]:
import xgboost as xgb
from sklearn.metrics import accuracy_score
In [43]:
%%time
xg_train = xgb.DMatrix(x_train, label=y_train)
xg_val = xgb.DMatrix(x_val, label=y_val)
xg_test = xgb.DMatrix(x_test)
# setup parameters for xgboost
param = {}
# use softmax multi-class classification
param['objective'] = 'multi:softmax'
# scale weight of positive examples
param['eta'] = 0.1
param['max_depth'] = 50
param['silent'] = 1
param['nthread'] = 4
param['num_class'] = 120
watchlist = [(xg_train, 'train'), (xg_val, 'val')]
num_round = 5
bst = xgb.train(param, xg_train, num_round, watchlist)
In [44]:
model_name = run_name + '.bin'
bst.save_model(model_name)
In [45]:
bst0 = xgb.Booster({'nthread': 4}) # init model
bst0.load_model(model_name) # load data
In [49]:
y_pred = bst0.predict(xg_val)
print(y_pred.shape)
print(y_pred[0:5])
In [50]:
# do the same thing again, but output probabilities
param['objective'] = 'multi:softprob'
bst1 = xgb.train(param, xg_train, num_round, watchlist)
# Note: this convention has been changed since xgboost-unity
# get prediction, this is in 1D array, need reshape to (ndata, nclass)
# pred_prob = bst0.predict(xg_val).reshape(test_Y.shape[0], 6)
# pred_label = np.argmax(pred_prob, axis=1)
# error_rate = np.sum(pred_label != test_Y) / test_Y.shape[0]
# print('Test error using softprob = {}'.format(error_rate))
In [52]:
model_name = run_name + '_prob.bin'
bst1.save_model(model_name)
In [53]:
bst0 = xgb.Booster({'nthread': 4}) # init model
bst0.load_model(model_name) # load data
In [51]:
y_pred = bst1.predict(xg_val)
print(y_pred.shape)
print(y_pred[0:5])
In [ ]:
In [ ]:
In [ ]:
run_name0 = run_name + '_' + str(int(final_acc*10000)).zfill(4)
In [ ]:
In [ ]:
# Used to load model directly and skip train
# import os
# from keras.models import load_model
# cwd = os.getcwd()
# model = load_model(os.path.join(cwd, 'model', 'Dog_Breed_Identification_Train_20171024_155154.h5'))
In [ ]:
y_pred = model.predict(x_test, batch_size=128)
print(y_pred.shape)
In [ ]:
# print(y_pred[:10])
# y_pred = np.clip(y_pred, 0.005, 0.995)
# print(y_pred[:10])
In [ ]:
files = os.listdir(os.path.join(cwd, 'input', 'data_test', 'test'))
print(files[:10])
In [ ]:
cwd = os.getcwd()
df = pd.read_csv(os.path.join(cwd, 'input', 'labels.csv'))
print('lables amount: %d' %len(df))
df.head()
In [ ]:
n = len(df)
breed = set(df['breed'])
n_class = len(breed)
class_to_num = dict(zip(breed, range(n_class)))
num_to_class = dict(zip(range(n_class), breed))
print(breed)
In [ ]:
df2 = pd.read_csv('.\\input\\sample_submission.csv')
n_test = len(df2)
print(df2.shape)
In [ ]:
for i in range(0, 120):
df2.iloc[:,[i+1]] = y_pred[:,i]
if not os.path.exists(output_path):
os.mkdir(output_path)
pred_file = os.path.join(output_path, 'pred_' + run_name0 + '.csv')
df2.to_csv(pred_file, index=None)
In [ ]:
In [ ]:
print(run_name0)
print('Done !')
In [ ]: